import os
import numpy as np
import pandas as pd
from glob import glob
from pathlib import Path
# image
import cv2
from skimage.io import imread
# TensorFlow
import tensorflow as tf
from tensorflow.keras import layers, models
# Visualisation libraries
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex, clear_output
## progressbar
from tqdm import tqdm
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
## maps
import folium
from folium import plugins
from folium.plugins import HeatMap
## seaborn
import seaborn as sns
## matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
from matplotlib.font_manager import FontProperties
import matplotlib.colors as mcolors
from matplotlib.colors import LinearSegmentedColormap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from matplotlib import cm
plt.style.use('seaborn-whitegrid')
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
def Header(Text, L = 100, C = 'Blue', T = 'White'):
BACK = {'Black': Back.BLACK, 'Red':Back.RED, 'Green':Back.GREEN, 'Yellow': Back.YELLOW, 'Blue': Back.BLUE,
'Magenta':Back.MAGENTA, 'Cyan': Back.CYAN}
FORE = {'Black': Fore.BLACK, 'Red':Fore.RED, 'Green':Fore.GREEN, 'Yellow':Fore.YELLOW, 'Blue':Fore.BLUE,
'Magenta':Fore.MAGENTA, 'Cyan':Fore.CYAN, 'White': Fore.WHITE}
print(BACK[C] + FORE[T] + Style.NORMAL + Text + Style.RESET_ALL + ' ' + FORE[C] +
Style.NORMAL + (L- len(Text) - 1)*'=' + Style.RESET_ALL)
def Line(L=100, C = 'Blue'):
FORE = {'Black': Fore.BLACK, 'Red':Fore.RED, 'Green':Fore.GREEN, 'Yellow':Fore.YELLOW, 'Blue':Fore.BLUE,
'Magenta':Fore.MAGENTA, 'Cyan':Fore.CYAN, 'White': Fore.WHITE}
print(FORE[C] + Style.NORMAL + L*'=' + Style.RESET_ALL)
Start_Path = 'images_hur'
Target = 'Damage'
Data = pd.DataFrame({'Path': list(Path(Start_Path).glob('**/*.jp*g'))})
Data['Path'].map(lambda x: x.parent.stem.title().replace('_',' '))
Data['File'] = Data['Path'].map(lambda x: str(x).split('\\')[-1])
Data[Target] = Data['Path'].map(lambda x: x.parent.stem.title().replace('_',' '))
Data['Dataset'] = Data['Path'].map(lambda x: x.parent.parent.stem)
Data = Data.loc[Data['Dataset'] != 'test_another']
Data['Dataset'] = Data['Dataset'].map(lambda x: x.title().replace('_Another',''))
Data['Latitude'] = Data['Path'].map(lambda x: x.stem.split('_')[0]).astype('float')
Data['Longitude'] = Data['Path'].map(lambda x: x.stem.split('_')[1]).astype('float')
#
Header('A Sample of the Dataframe')
Data = Data.reindex(sorted(Data.columns), axis=1)
display(Data.sample(10))
display(pd.DataFrame({'Number of Instances':[Data.shape[0]], 'Number of Attributes':[Data.shape[1]]}).style.hide_index())
#
def Path_Tree(startpath, Extension, sep = ' ' * 3, n = 5):
Folders_with_Images = []
C = ['Red', 'Green', 'Magenta', 'Cyan']*len(os.listdir(startpath))
BACK = {'Black': Back.BLACK, 'Red':Back.RED, 'Green':Back.GREEN,
'Yellow': Back.YELLOW, 'Blue': Back.BLUE,
'Magenta':Back.MAGENTA, 'Cyan': Back.CYAN}
for root, _, files in os.walk(startpath):
level = root.replace(startpath, '').count(os.sep)
if level >0:
indent = sep* (level)+ '└──'
print(indent + BACK[C[level]] + Fore.BLACK + Style.NORMAL + os.path.basename(root) + Style.RESET_ALL)
else:
title = os.path.basename(root)
print(Style.RESET_ALL + Fore.BLUE + Style.NORMAL + '=' * (len(title) +1) + Style.RESET_ALL)
print(Back.BLACK + Fore.CYAN + Style.NORMAL + title + Style.RESET_ALL)
print(Style.RESET_ALL + Fore.BLUE + Style.NORMAL + '=' * (len(title) +1)+ Style.RESET_ALL)
subindent = ' ' * 4 * (level + 1)
for file in files[:1]:
if file.endswith(Extension):
Folders_with_Images.append(root)
List = os.listdir(root)
print(level* sep, Fore.BLUE + Style.NORMAL +
'%i %s files: ' % (len(List), List[0].split('.')[-1].upper()) + Style.RESET_ALL +
'%s'%', '.join(List[:n]) + ', ...')
return Folders_with_Images
_ = Folders_with_Images = Path_Tree(Start_Path, '.jpeg', n = 3)
A Sample of the Dataframe ==========================================================================
| Damage | Dataset | File | Latitude | Longitude | Path | |
|---|---|---|---|---|---|---|
| 11775 | Damage | Train | -95.08003599999999_29.828667.jpeg | -95.080036 | 29.828667 | images_hur\train_another\damage\-95.0800359999... |
| 13639 | Damage | Train | -95.595192_29.765008.jpeg | -95.595192 | 29.765008 | images_hur\train_another\damage\-95.595192_29.... |
| 21968 | Damage | Validation | -96.968689_28.816515000000003.jpeg | -96.968689 | 28.816515 | images_hur\validation_another\damage\-96.96868... |
| 14453 | Damage | Train | -95.632126_29.775575.jpeg | -95.632126 | 29.775575 | images_hur\train_another\damage\-95.632126_29.... |
| 21184 | Damage | Validation | -95.086515_29.827463.jpeg | -95.086515 | 29.827463 | images_hur\validation_another\damage\-95.08651... |
| 19126 | No Damage | Train | -95.635625_29.844209999999997.jpeg | -95.635625 | 29.844210 | images_hur\train_another\no_damage\-95.635625_... |
| 11156 | Damage | Train | -93.742164_30.122063.jpeg | -93.742164 | 30.122063 | images_hur\train_another\damage\-93.742164_30.... |
| 19351 | No Damage | Train | -95.638134_29.838812.jpeg | -95.638134 | 29.838812 | images_hur\train_another\no_damage\-95.638134_... |
| 35 | Damage | Test | -93.734053_29.788496000000002.jpeg | -93.734053 | 29.788496 | images_hur\test\damage\-93.734053_29.788496000... |
| 22264 | No Damage | Validation | -95.276788_29.6097.jpeg | -95.276788 | 29.609700 | images_hur\validation_another\no_damage\-95.27... |
| Number of Instances | Number of Attributes |
|---|---|
| 14000 | 6 |
=========== images_hur =========== └──test └──damage 1000 JPEG files: -93.548123_30.900623.jpeg, -93.560128_30.894917.jpeg, -93.578271_30.779923999999998.jpeg, ... └──no_damage 1000 JPEG files: -95.061894_30.007746.jpeg, -95.061936_29.828088.jpeg, -95.062123_30.056714000000003.jpeg, ... └──test_another └──damage 8000 JPEG files: -93.528502_30.987438.jpeg, -93.5302_30.988157.jpeg, -93.53950999999999_30.982944.jpeg, ... └──no_damage 1000 JPEG files: -95.062321_30.060401000000002.jpeg, -95.062533_30.055047.jpeg, -95.062589_29.830265.jpeg, ... └──train_another └──damage 5000 JPEG files: -93.55964_30.895018.jpeg, -93.573763_30.693981.jpeg, -93.578271_30.779923999999998.jpeg, ... └──no_damage 5000 JPEG files: -95.061275_29.831535.jpeg, -95.061412_29.826522999999998.jpeg, -95.061825_29.828608000000003.jpeg, ... └──validation_another └──damage 1000 JPEG files: -93.558326_30.895248.jpeg, -93.563851_30.894492.jpeg, -93.57315600000001_30.994087.jpeg, ... └──no_damage 1000 JPEG files: -95.061894_30.007746.jpeg, -95.062123_30.056714000000003.jpeg, -95.062218_30.055870000000002.jpeg, ...
def DatasetDist(Inp, Target, PD):
Table = Inp[Target].value_counts().to_frame('Count').reset_index(drop = False).rename(columns = {'index':Target})
Table = Table.sort_values(by = [Target])
Table['Percentage'] = np.round(100*(Table['Count']/Table['Count'].sum()), 2)
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths=PD['column_widths'],
specs=[[{"type": "table"},{"type": "pie"}]])
# Right
fig.add_trace(go.Pie(labels=Table[Target].values, values=Table['Count'].values,
pull=PD['pull'], textfont=dict(size= PD['textfont']),
marker=dict(colors = [Colors_dict[x] for x in Table[Target].unique()],
line=dict(color='black', width=1))), row=1, col=2)
fig.update_traces(hole=PD['hole'])
fig.update_layout(height = PD['height'], legend=dict(orientation=PD['legend_orientation']),
legend_title_text= PD['legend_title'])
# Left
T = Table.copy()
T['Percentage'] = T['Percentage'].map(lambda x: '%%%.2f' % x)
Temp = []
for i in T.columns:
Temp.append(T.loc[:,i].values)
fig.add_trace(go.Table(header=dict(values = list(Table.columns), line_color='darkslategray',
fill_color= PD['TableColors'][0], align=['center','center'],
font=dict(color='white', size=12), height=25), columnwidth = PD['tablecolumnwidth'],
cells=dict(values=Temp, line_color='darkslategray',
fill=dict(color= [PD['TableColors'][1], PD['TableColors'][1]]),
align=['center','center', 'center'], font_size=12, height=20)), 1, 1)
fig.update_layout(title={'text': '<b>' + PD['Title'] + '<b>', 'x':PD['title_x'],
'y':PD['title_y'], 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Colors_dict = dict(zip(Data[Target].unique().tolist(),['DarkRed', 'DarkGreen']))
Pull = [0 for x in range((len(Data[Target].unique().tolist())-1))]
Pull.append(.05)
PD = dict(PieColors = Colors_dict, TableColors = ['DarkGreen','GhostWhite'], hole = .4,
column_widths=[0.5, 0.5],textfont = 14, height = 350, tablecolumnwidth = [.1, .05, .08], pull = Pull,
legend_title = Target, legend_orientation = 'v',
Title ='Train Set', title_x = 0.5, title_y = 0.84)
del Pull
DatasetDist(Data.loc[Data['Dataset'] == 'Train'], Target = Target, PD = PD)
PD.update(dict(TableColors = ['Indigo','GhostWhite'], Title ='Test Set'))
DatasetDist(Data.loc[Data['Dataset'] == 'Test'], Target = Target, PD = PD)
fig, ax = plt.subplots(4, 5 , figsize = (12, 12))
_ = fig.suptitle('A Sample of Train Dataset', fontweight='bold', fontsize = 18)
ax = ax.ravel()
for i, row in Data.loc[Data['Dataset'] == 'Train'].sample(len(ax)).reset_index(drop =True).iterrows():
_ = ax[i].imshow(imread(row['Path']))
_ = ax[i].set_title('%s' % row[Target], fontweight='bold', fontsize = 12, color = Colors_dict [row[Target]])
_ = ax[i].axis("off")
_ = ax[i].set_aspect(1)
fig.tight_layout()
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
_ = sns.scatterplot(data=Data, x='Latitude', y='Longitude', hue='Dataset', style='Damage', ax = ax, s= 50, alpha = 0.5)
_ = ax.set_xlabel('Latitude')
_ = ax.set_ylabel('Longitude')
_ = ax.grid(False)
_ = ax.set_xlim([-98, -93])
_ = ax.set_ylim([28, 32])
_ = ax.set_title('Image Location by Damage and Dataset Group', weight = 'bold', fontsize = 16)
batch_size = 128
(Img_Height, Img_Width, _) = imread(Data['Path'][0]).shape
Header('Train Images')
train_ds = tf.keras.preprocessing.image_dataset_from_directory(directory= 'images_hur\\train_another',
shuffle=True,
seed=123,
image_size=(Img_Height, Img_Width),
batch_size=batch_size)
Header('Validation Images Data Generator', C = 'Green')
val_ds = tf.keras.preprocessing.image_dataset_from_directory(directory= 'images_hur\\test',
image_size=(Img_Height, Img_Width),
batch_size=batch_size)
Line()
Train Images ======================================================================================= Found 10000 files belonging to 2 classes. Validation Images Data Generator =================================================================== Found 2000 files belonging to 2 classes. ====================================================================================================
A multi-layer perceptron (MLP) is a class of feedforward artificial neural network (ANN). The algorithm at each iteration uses the SparseCategoricalCrossentropy to measure the loss, and then the gradient and the model update is calculated. At the end of this iterative process, we would reach a better level of agreement between test and predicted sets since the error would be lower from that of the first step.
num_classes = len(Data[Target].unique())
data_augmentation = tf.keras.Sequential([layers.experimental.preprocessing.RandomFlip("horizontal",
input_shape=(Img_Height, Img_Width, 3)),
layers.experimental.preprocessing.RandomRotation(0.1),
layers.experimental.preprocessing.RandomZoom(0.1)])
model = models.Sequential(name = 'Multi_Class_MLP',
layers = [data_augmentation,
layers.experimental.preprocessing.Rescaling(1./255, input_shape=(Img_Height, Img_Width, 3)),
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dense(num_classes)])
model.summary()
tf.keras.utils.plot_model(model, show_shapes=True, show_dtype=True,
show_layer_names=True, expand_nested = False, rankdir= 'LR')
Model: "Multi_Class_MLP" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= sequential (Sequential) (None, 128, 128, 3) 0 _________________________________________________________________ rescaling (Rescaling) (None, 128, 128, 3) 0 _________________________________________________________________ conv2d (Conv2D) (None, 128, 128, 16) 448 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 64, 64, 16) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 64, 64, 32) 4640 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 32, 32, 32) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 32, 32, 64) 18496 _________________________________________________________________ max_pooling2d_2 (MaxPooling2 (None, 16, 16, 64) 0 _________________________________________________________________ flatten (Flatten) (None, 16384) 0 _________________________________________________________________ dense (Dense) (None, 128) 2097280 _________________________________________________________________ dense_1 (Dense) (None, 2) 258 ================================================================= Total params: 2,121,122 Trainable params: 2,121,122 Non-trainable params: 0 _________________________________________________________________
Compiling and fitting the model
# Number of iterations
IT = 21
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
# Training the model
history = model.fit(train_ds, validation_data=val_ds, epochs=IT, verbose = 0,
callbacks=[tf.keras.callbacks.EarlyStopping(patience=4, restore_best_weights=True)])
def Search_List(Key, List): return [s for s in List if Key in s]
Metrics_Names = dict(zip(model.metrics_names, [x.replace('_',' ').title() for x in model.metrics_names]))
def Table_modify(df, Metrics_Names = Metrics_Names):
df = df.rename(columns = Metrics_Names)
df = df.reindex(sorted(df.columns), axis=1)
df.insert(loc = 0, column = 'Iteration', value = np.arange(0, df.shape[0]), allow_duplicates=False)
return df
Validation_Table = Search_List('val_',history.history.keys())
Train_Table = list(set( history.history.keys()) - set(Validation_Table))
Validation_Table = pd.DataFrame(np.array([history.history[x] for x in Validation_Table]).T, columns = Validation_Table)
Train_Table = pd.DataFrame(np.array([history.history[x] for x in Train_Table]).T, columns = Train_Table)
Validation_Table.columns = [x.replace('val_','') for x in Validation_Table.columns]
Train_Table = Table_modify(Train_Table)
Validation_Table = Table_modify(Validation_Table)
# Train Set Score
score = model.evaluate(train_ds, batch_size = batch_size, verbose = 0)
score = pd.DataFrame(score, index = model.metrics_names).T
score.index = ['Train Set Score']
# Validation Set Score
Temp = model.evaluate(val_ds, batch_size = batch_size, verbose = 0)
Temp = pd.DataFrame(Temp, index = model.metrics_names).T
Temp.index = ['Validation Set Score']
score = score.append(Temp)
score.rename(columns= Metrics_Names, inplace = True)
score = score.reindex(sorted(score.columns), axis=1)
display(score.style.set_precision(4))
| Accuracy | Loss | |
|---|---|---|
| Train Set Score | 0.9468 | 0.1306 |
| Validation Set Score | 0.9455 | 0.1404 |
def Plot_history(history, PD, Title = False, metrics_names = list(Metrics_Names.values())):
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths=[0.6, 0.4],
specs=[[{"type": "scatter"},{"type": "table"}]])
# Left
Colors = ['OrangeRed', 'MidnightBlue', 'purple']
for j in range(len(metrics_names)):
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history[metrics_names[j]].values,
line=dict(color=Colors[j], width= 1.5), name = metrics_names[j]), 1, 1)
fig.update_layout(legend=dict(x=0, y=1.1, traceorder='reversed', font_size=12),
dragmode='select', plot_bgcolor= 'white', height=600, hovermode='closest',
legend_orientation='h')
fig.update_xaxes(range=[history.Iteration.min(), history.Iteration.max()],
showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
fig.update_yaxes(range=[0, PD['yLim']], showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
# Right
if not PD['Table_Rows'] == None:
ind = np.linspace(0, history.shape[0], PD['Table_Rows'], endpoint = False).round(0).astype(int)
ind = np.append(ind, history.index[-1])
history = history[history.index.isin(ind)]
T = history.copy()
T[metrics_names] = T[metrics_names].applymap(lambda x: '%.4e' % x)
Temp = []
for i in T.columns:
Temp.append(T.loc[:,i].values)
TableColors = PD['TableColors']
fig.add_trace(go.Table(header=dict(values = list(history.columns), line_color=TableColors[0],
fill_color=TableColors[0], align=['center','center'], font=dict(color=TableColors[1], size=12), height=25),
columnwidth = PD['tablecolumnwidth'], cells=dict(values=Temp, line_color=TableColors[0],
fill=dict(color=[TableColors[1], TableColors[1]]),
align=['center', 'center'], font_size=12,height=20)), 1, 2)
if Title != False:
fig.update_layout(plot_bgcolor= 'white',
title={'text': Title, 'x':0.46, 'y':0.94, 'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
PD = dict(Table_Rows = 25, yLim = 1, tablecolumnwidth = [0.3, 0.4, 0.4], TableColors = ['DarkSlateGray','White'])
Plot_history(Train_Table, Title = 'Train Set', PD = PD)
Plot_history(Validation_Table, Title = 'Validation Set', PD = PD)
val_images = []
val_labels = []
for x, y in list(val_ds):
val_images.extend(x)
val_labels.extend(y)
Pred = model.predict(val_ds)
class_names = val_ds.class_names
class_names = [x.title() for x in class_names]
Prob = np.array(tf.nn.softmax(Pred[0]))
for i in tqdm(range(len(Pred[1:]))):
Temp = np.array( tf.nn.softmax(Pred[i]))
Prob = np.column_stack((Prob, Temp))
Pred = pd.DataFrame(data = Prob.T, columns = class_names)
Pred['Actual Label'] = [class_names[x] for x in val_labels]
100%|███████████████████████████████████████████████████████████████████████████| 1999/1999 [00:00<00:00, 28550.85it/s]
display(Pred.round(2))
| Damage | No_Damage | Actual Label | |
|---|---|---|---|
| 0 | 0.00 | 1.00 | Damage |
| 1 | 0.00 | 1.00 | Damage |
| 2 | 1.00 | 0.00 | No_Damage |
| 3 | 0.00 | 1.00 | No_Damage |
| 4 | 0.68 | 0.32 | No_Damage |
| ... | ... | ... | ... |
| 1995 | 0.00 | 1.00 | Damage |
| 1996 | 0.00 | 1.00 | Damage |
| 1997 | 0.99 | 0.01 | No_Damage |
| 1998 | 0.00 | 1.00 | No_Damage |
| 1999 | 0.00 | 1.00 | Damage |
2000 rows × 3 columns